#install.packages("rlang")
#install.packages("ggplot2")
#install.packages("tidyverse")
#install.packages("dplyr")
#install.packages("stringr")
#install.packages("lubridate")
#install.packages("tidymodels")
#install.packages("ggmap")
#install.packages("devtools")
#install.packages("devtools")
#devtools::install_github("adror1/nwslR")
library(ggplot2)
library(lubridate)
library(dplyr)
library(tidyr)
library(broom)
library(praise)
library(plotly)
player_season_stats <- nwslR::fieldplayer_overall_season_stats
team_data <- nwslR::team_stats_season
draft_data <- nwslR::draftpicks
adv_player_stats <- nwslR::adv_player_stats
adv_team_stats <- nwslR::adv_team_stats
awards <- nwslR::award
franchise <- nwslR::franchise
players <- nwslR::player
player_season_stats <- inner_join(players, player_season_stats, by = "person_id", copy = FALSE) %>%
select(-nation.y, -pos.y, -name_other)
player_season_stats <- player_season_stats %>%
rename(nation = nation.x, pos = pos.x)
#gives us goals/min for each season
nwsl_goals_per_min <- player_season_stats %>%
group_by(season) %>%
filter(pos == "FW") %>%
mutate(goals_per_min = gls/min) %>%
filter(goals_per_min > 0, goals_per_min < .1)
nwsl_goals_per_min_plot <- nwsl_goals_per_min %>%
ggplot(aes(x = season, y = goals_per_min)) +
geom_point(color = "blue") +
geom_boxplot(color = "red", outliers = FALSE)
## Warning: Ignoring unknown parameters: outliers
ggplotly(nwsl_goals_per_min_plot, tooltip = "player_name")
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
#gives us an average goals/min over the player's career (2013 - 2019)
nwsl_goals_per_min_career <- nwsl_goals_per_min %>%
group_by(player) %>%
summarise(goals_per_min = mean(goals_per_min))
nwsl_goals_per_min_career
## # A tibble: 87 x 2
## player goals_per_min
## * <chr> <dbl>
## 1 Abby Wambach 0.00719
## 2 Adriana 0.00251
## 3 Alex Morgan 0.00480
## 4 Alexa Newfield 0.00304
## 5 Allie Bailey 0.00794
## 6 Ana-Maria Crnogorčević 0.00243
## 7 Arielle Ship 0.0129
## 8 Ashleigh Sykes 0.00101
## 9 Ashley Hatch 0.00375
## 10 Bethany Balcer 0.00353
## # … with 77 more rows
players %>%
filter(player == "Carli Lloyd")
## # A tibble: 1 x 5
## person_id player nation pos name_other
## <dbl> <chr> <chr> <chr> <chr>
## 1 76 Carli Lloyd USA MF <NA>
player_season_stats %>%
filter(player == "Carli Lloyd")
## # A tibble: 7 x 15
## person_id player nation pos season team_id mp starts min gls ast
## <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 76 Carli… USA MF 2013 WNY 15 14 8 0 NA
## 2 76 Carli… USA MF 2014 WNY 19 19 1710 8 5
## 3 76 Carli… USA MF 2015 HOU 12 12 1080 4 0
## 4 76 Carli… USA MF 2016 HOU 7 7 553 5 3
## 5 76 Carli… USA MF 2017 HOU 8 8 647 2 0
## 6 76 Carli… USA MF 2018 NJ 18 17 1563 4 1
## 7 76 Carli… USA MF 2019 NJ 14 14 1260 8 1
## # … with 4 more variables: pk <dbl>, p_katt <dbl>, crd_y <dbl>, crd_r <dbl>
player_season_stats %>%
filter(player == c("Carli Lloyd")) %>%
ggplot(aes(x = season, y = gls)) +
geom_point(color = "red")
name = "Alex Morgan"
player_season_stats %>%
filter(player == c(name)) %>%
ggplot(aes(x = season, y = gls)) +
geom_point(shape = "star", color = "red")
p1 <- player_season_stats %>%
#group_by(team_id) %>%
ggplot(aes(x = gls, y = ast)) +
geom_point(shape = "square", color = "blue") +
geom_smooth(color = "red")
ggplotly(p1)
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
There appears to be a weak, positive correlation between goals and assists per season. The correlation grows weaker as the player scores more goals, meaning that they assist less. Let’s look at this by position:
player_season_stats %>%
group_by(pos) %>%
summarise(count = n())
## # A tibble: 6 x 2
## pos count
## * <chr> <int>
## 1 DF 353
## 2 DF,FW 40
## 3 DF,MF 129
## 4 FW 334
## 5 FW,MF 146
## 6 MF 348
ggplotly(
player_season_stats %>%
group_by(pos) %>%
#mutate(gls = median(gls), ast = median(ast)) %>%#
ggplot(aes(x = pos, y = (ast/gls))) +
geom_boxplot(outlier.shape = NA)
)
## Warning: Removed 747 rows containing non-finite values (stat_boxplot).
Defenders have the lowest median assists per goal ratio, presumably because they do not usually score nor assist, while forwards have the second lowest, presumably because they score way more than they assist. But, strangely enough, the position category with the highest assists to goal ratio is the category of players who play both Defense and Forward. This is most likely occuring because: players who fill both the DF and FW positions are most likely placed on the outside, the wingers. Therefore, they are probably quick and skilled at crossing the ball, leading to a higher assist ratio!
maxgoals <- team_data %>%
group_by(team_id) %>%
group_by(season) %>%
summarize(max(goals)) %>%
ggplot(x = year, y = goals)
#ggplot(aes(x = maxgoals[["data"]][["season"]], y = maxgoals[["data"]][["max(goals)"]]))
maxgoals <- team_data %>%
group_by(team_id) %>%
summarize(max(goals))
maxgoals
## # A tibble: 12 x 2
## team_id `max(goals)`
## * <chr> <int>
## 1 BOS 24
## 2 CHI 41
## 3 HOU 35
## 4 KC 29
## 5 NC 54
## 6 NJ 42
## 7 ORL 45
## 8 POR 40
## 9 SEA 43
## 10 UTA 25
## 11 WAS 30
## 12 WNY 40
all_goals <- team_data %>%
group_by(season) %>%
ggplot(aes(x = season, y = goals)) +
geom_point(color = "red") +
geom_smooth(color = "black")
ggplotly(all_goals)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 2016
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.344e-17
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 4.0602
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 2016
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 2.015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 7.344e-17
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 4.0602
There does not appear to be a significant trend in overal goal scorring from 2016 to 2019. 2016, however, 2016 is noticably lower than 2017, 2018, and 2019, which are all very similar in median values.
Let’s explore some of the rounds & popular feeder university programs.
top_feeders <- draft_data %>%
filter(round == 1) %>%
count(previous_team, sort = TRUE)
top_feeders
## previous_team n
## 1 UCLA 7
## 2 Florida State 6
## 3 Stanford 5
## 4 North Carolina 4
## 5 Duke University 3
## 6 Penn State 3
## 7 University of Southern California 3
## 8 Virginia 3
## 9 Florida 2
## 10 Pepperdine 2
## 11 Stanford University 2
## 12 Texas Tech 2
## 13 West Virginia University 2
## 14 ASA Chesapeake Charge, Penn State 1
## 15 Boston College 1
## 16 Boston College, Canberra United FC 1
## 17 Brigham Young University 1
## 18 BYU 1
## 19 California 1
## 20 Clemson 1
## 21 Harvard University 1
## 22 Illinois 1
## 23 Kentucky 1
## 24 Michigan 1
## 25 NC State 1
## 26 Notre Dame 1
## 27 Oklahoma State 1
## 28 Portland 1
## 29 Santa Clara 1
## 30 Santa Clara University 1
## 31 St. John's 1
## 32 Texas A&M 1
## 33 University of California 1
## 34 University of Colorado 1
## 35 University of Florida 1
## 36 University of Nebraska 1
## 37 University of Notre Dame 1
## 38 University of South Carolina 1
## 39 University of South Florida 1
## 40 University of Virginia 1
## 41 University of Wisconsin 1
## 42 USC 1
## 43 Washington State University 1
UCLA is the top provider of 1st round NWSL draft picks since 2013, closely followed by FSU and Stanford. Let’s explore the draft as a whole:
draft_colleges <- draft_data %>%
count(previous_team, sort = TRUE)
draft_colleges
## previous_team n
## 1 UCLA 15
## 2 Penn State 12
## 3 Stanford 11
## 4 Florida State 10
## 5 North Carolina 10
## 6 Virginia 10
## 7 Duke University 9
## 8 University of Southern California 7
## 9 Florida 6
## 10 University of Virginia 6
## 11 Duke 5
## 12 Pepperdine 5
## 13 Santa Clara 5
## 14 University of Nebraska 5
## 15 University of North Carolina 5
## 16 Wake Forest 5
## 17 Boston College 4
## 18 Brigham Young University 4
## 19 Clemson 4
## 20 Notre Dame 4
## 21 Rutgers 4
## 22 Texas A&M 4
## 23 Texas Tech 4
## 24 Georgetown 3
## 25 Maryland 3
## 26 Northwestern 3
## 27 Portland 3
## 28 Rutgers University 3
## 29 Stanford University 3
## 30 UCF 3
## 31 University of California 3
## 32 University of Colorado 3
## 33 University of Florida 3
## 34 USC 3
## 35 West Virginia 3
## 36 Arizona State 2
## 37 BYU 2
## 38 Colorado 2
## 39 Colorado College 2
## 40 Georgetown University 2
## 41 Illinois 2
## 42 Kentucky 2
## 43 Michigan 2
## 44 Missouri 2
## 45 Ohio State University 2
## 46 University of California, Berkeley 2
## 47 University of Connecticut 2
## 48 University of Kansas 2
## 49 University of Washington 2
## 50 University of Wisconsin 2
## 51 Virginia Tech 2
## 52 Washington State University 2
## 53 West Virginia University 2
## 54 William & Mary 2
## 55 Arkansas 1
## 56 ASA Chesapeake Charge, Penn State 1
## 57 Auburn University 1
## 58 Baylor 1
## 59 Boston College, Canberra United FC 1
## 60 Bowling Green 1
## 61 Butler 1
## 62 Cal State Fullerton 1
## 63 California 1
## 64 Central Florida 1
## 65 Chicago Eclipse Select, Florida State 1
## 66 Chicago Eclipse Select, Stanford 1
## 67 D.C. United Women, Florida 1
## 68 Dayton 1
## 69 Denver 1
## 70 DePaul 1
## 71 DePaul University 1
## 72 Georgia, Lincoln Ladies FC 1
## 73 Harvard University 1
## 74 Hofstra 1
## 75 Illinois State 1
## 76 Illinois State University 1
## 77 James Madison 1
## 78 Kansas 1
## 79 Loyola–Chicago 1
## 80 LSU 1
## 81 Marquette 1
## 82 Marquette University 1
## 83 Minnesota 1
## 84 Mississippi 1
## 85 Mississippi State 1
## 86 Mississippi State University 1
## 87 Missouri State 1
## 88 NC State 1
## 89 Nebraska 1
## 90 New York Athletic Club, Princeton 1
## 91 Northeastern 1
## 92 Northern Colorado 1
## 93 Oklahoma State 1
## 94 Ole Miss 1
## 95 Oregon 1
## 96 Oregon State 1
## 97 Pepperdine University 1
## 98 Princeton University 1
## 99 San Francisco 1
## 100 Santa Clara University 1
## 101 Seattle 1
## 102 South Carolina 1
## 103 St. John's 1
## 104 TCU 1
## 105 Texas 1
## 106 UNC, Bayern Munich 1
## 107 University of Alabama 1
## 108 University of California, Irvine 1
## 109 University of Central Florida 1
## 110 University of Denver 1
## 111 University of Minnesota 1
## 112 University of Notre Dame 1
## 113 University of South Carolina 1
## 114 University of South Florida 1
## 115 University of Texas 1
## 116 Utah 1
## 117 Villanova 1
## 118 Washington State 1
## 119 William and Mary 1
## 120 Wisconsin 1
## 121 Yale 1
## 122 Yale University 1
UCLA is again the top contender, but Penn State situates itself in second place’s position, providing 12 NWSL draft picks since 2013.***NOTE: I need to clean this data better before making any substantial statements about it.
adv_player_stats$player_name = paste(adv_player_stats$first_name, adv_player_stats$last_name)
head(adv_player_stats$player_name)
## [1] "Michelle Betos" "Meghan Klingenberg" "Emily Menges"
## [4] "Emily Sonnett" "Katherine Reynolds" "Amandine Henry"
adv_player_stats$passing_acc = adv_player_stats$accurate_pass / adv_player_stats$total_pass
head(adv_player_stats$passing_acc)
## [1] 0.8536585 0.8571429 0.9062500 0.8507463 0.7500000 0.8684211
passing_accuracy <- adv_player_stats %>%
group_by(player_name) %>%
summarize(avg_passing_acc = mean(passing_acc))
passing_accuracy <- passing_accuracy %>%
arrange(desc(avg_passing_acc))
passing_accuracy
## # A tibble: 440 x 2
## player_name avg_passing_acc
## <chr> <dbl>
## 1 Lisa De Vanna 0.844
## 2 Samantha Staab 0.824
## 3 Hope Solo 0.817
## 4 Kim Little 0.813
## 5 Kendall Lorraine Fletcher 0.807
## 6 Keelin Winters 0.800
## 7 Andi Sullivan 0.800
## 8 Veronica Boquete 0.793
## 9 Linda Motlhalo 0.791
## 10 Emily Menges 0.789
## # … with 430 more rows
The NWSL players with the statistically highest passing accuracy are: 1) Lisa De Vanna, 2) Samantha Staab, and 3) Hope Solo. Let’s check them out:
player_name_and_id <- data_frame(players$person_id, players$player)
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
player_name_and_id
## # A tibble: 555 x 2
## `players$person_id` `players$player`
## <dbl> <chr>
## 1 342 Marisa Abegg
## 2 117 Danesha Adams
## 3 6 Adriana
## 4 300 Leigh Ann Brown
## 5 202 Jazmyne Avant
## 6 28 Amy Barczuk
## 7 290 Lauren Barnes
## 8 56 Brittany Bock
## 9 313 Liz Bogus
## 10 363 Melanie Booth
## # … with 545 more rows
player_name_and_id <- player_name_and_id %>%
rename(person_id = "players$person_id", player = "players$player")
player_season_stats <- full_join(player_name_and_id, player_season_stats, by = "person_id")
player_season_stats %>%
filter(player.x == "Lisa De Vanna")
## # A tibble: 4 x 16
## person_id player.x player.y nation pos season team_id mp starts min
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 311 Lisa De… Lisa De… AUS FW 2013 NJ 16 15 NA
## 2 311 Lisa De… Lisa De… AUS FW 2014 WAS 11 9 794
## 3 311 Lisa De… Lisa De… AUS FW 2014 BOS 6 5 448
## 4 311 Lisa De… Lisa De… AUS FW 2016 ORL 3 2 153
## # … with 6 more variables: gls <dbl>, ast <dbl>, pk <dbl>, p_katt <dbl>,
## # crd_y <dbl>, crd_r <dbl>
adv_player_stats %>%
filter(player_name == "Samantha Staab")
## # A tibble: 24 x 269
## game_id status team_id first_name last_name person_id shirt_number position
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 portla… away WAS Samantha Staab 444 3 Defender
## 2 washin… home WAS Samantha Staab 444 3 Defender
## 3 housto… away WAS Samantha Staab 444 3 Defender
## 4 chicag… away WAS Samantha Staab 444 3 Defender
## 5 washin… home WAS Samantha Staab 444 3 Defender
## 6 orland… away WAS Samantha Staab 444 3 Defender
## 7 washin… home WAS Samantha Staab 444 3 Defender
## 8 washin… home WAS Samantha Staab 444 3 Defender
## 9 portla… away WAS Samantha Staab 444 3 Defender
## 10 washin… home WAS Samantha Staab 444 3 Defender
## # … with 14 more rows, and 261 more variables: position_side <chr>,
## # game_started <dbl>, mins_played <dbl>, formation_place <dbl>,
## # total_sub_on <dbl>, total_sub_off <dbl>, player_off_id <chr>,
## # player_on_id <chr>, sub_position <chr>, leftside_pass <dbl>,
## # accurate_keeper_sweeper <dbl>, accurate_pass <dbl>,
## # total_final_third_passes <dbl>, rightside_pass <dbl>,
## # attempts_conceded_ibox <dbl>, touches <dbl>, total_fwd_zone_pass <dbl>,
## # keeper_pick_up <dbl>, att_assist_openplay <dbl>,
## # accurate_fwd_zone_pass <dbl>, saves <dbl>, attempts_conceded_obox <dbl>,
## # ball_recovery <dbl>, turnover <dbl>, poss_won_def_3_rd <dbl>,
## # accurate_back_zone_pass <dbl>, successful_open_play_pass <dbl>,
## # total_back_zone_pass <dbl>, total_long_balls <dbl>,
## # accurate_keeper_throws <dbl>, goal_kicks <dbl>, open_play_pass <dbl>,
## # total_pass <dbl>, total_launches <dbl>, fwd_pass <dbl>,
## # ontarget_att_assist <dbl>, long_pass_own_to_opp <dbl>,
## # total_keeper_sweeper <dbl>, successful_final_third_passes <dbl>,
## # keeper_throws <dbl>, accurate_launches <dbl>, poss_lost_all <dbl>,
## # accurate_long_balls <dbl>, clean_sheet <dbl>, accurate_goal_kicks <dbl>,
## # saved_obox <dbl>, unsuccessful_touch <dbl>, poss_lost_ctrl <dbl>,
## # final_third_entries <dbl>, long_pass_own_to_opp_success <dbl>,
## # total_att_assist <dbl>, shot_faced <dbl>, duel_lost <dbl>,
## # blocked_scoring_att <dbl>, poss_won_att_3_rd <dbl>, dispossessed <dbl>,
## # accurate_cross <dbl>, att_rf_total <dbl>, won_tackle <dbl>,
## # total_chipped_pass <dbl>, lost_corners <dbl>, total_scoring_att <dbl>,
## # total_throws <dbl>, att_obx_centre <dbl>, att_openplay <dbl>,
## # poss_won_mid_3_rd <dbl>, freekick_cross <dbl>, touches_in_opp_box <dbl>,
## # accurate_chipped_pass <dbl>, duel_won <dbl>, total_cross_nocorner <dbl>,
## # total_tackle <dbl>, passes_left <dbl>, total_cross <dbl>,
## # att_obox_blocked <dbl>, head_pass <dbl>, crosses_18_yard <dbl>,
## # accurate_cross_nocorner <dbl>, effective_clearance <dbl>,
## # won_corners <dbl>, interception <dbl>, attempted_tackle_foul <dbl>,
## # backward_pass <dbl>, interception_won <dbl>, pen_area_entries <dbl>,
## # accurate_throws <dbl>, fouls <dbl>, total_clearance <dbl>,
## # crosses_18_yardplus <dbl>, total_shots <dbl>,
## # effective_blocked_cross <dbl>, outfielder_block <dbl>, blocked_cross <dbl>,
## # shield_ball_oop <dbl>, offside_provoked <dbl>, ontarget_scoring_att <dbl>,
## # passes_right <dbl>, att_ibox_target <dbl>, att_bx_centre <dbl>,
## # att_sv_low_centre <dbl>, …
adv_player_stats %>%
filter(player_name == "Hope Solo")
## # A tibble: 8 x 269
## game_id status team_id first_name last_name person_id shirt_number position
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 seattl… home SEA Hope Solo 10022 1 Goalkee…
## 2 sky-bl… away SEA Hope Solo 10022 1 Goalkee…
## 3 seattl… home SEA Hope Solo 10022 1 Goalkee…
## 4 seattl… home SEA Hope Solo 10022 1 Goalkee…
## 5 boston… away SEA Hope Solo 10022 1 Goalkee…
## 6 orland… away SEA Hope Solo 10022 1 Goalkee…
## 7 kansas… away SEA Hope Solo 10022 1 Goalkee…
## 8 seattl… home SEA Hope Solo 10022 1 Goalkee…
## # … with 261 more variables: position_side <chr>, game_started <dbl>,
## # mins_played <dbl>, formation_place <dbl>, total_sub_on <dbl>,
## # total_sub_off <dbl>, player_off_id <chr>, player_on_id <chr>,
## # sub_position <chr>, leftside_pass <dbl>, accurate_keeper_sweeper <dbl>,
## # accurate_pass <dbl>, total_final_third_passes <dbl>, rightside_pass <dbl>,
## # attempts_conceded_ibox <dbl>, touches <dbl>, total_fwd_zone_pass <dbl>,
## # keeper_pick_up <dbl>, att_assist_openplay <dbl>,
## # accurate_fwd_zone_pass <dbl>, saves <dbl>, attempts_conceded_obox <dbl>,
## # ball_recovery <dbl>, turnover <dbl>, poss_won_def_3_rd <dbl>,
## # accurate_back_zone_pass <dbl>, successful_open_play_pass <dbl>,
## # total_back_zone_pass <dbl>, total_long_balls <dbl>,
## # accurate_keeper_throws <dbl>, goal_kicks <dbl>, open_play_pass <dbl>,
## # total_pass <dbl>, total_launches <dbl>, fwd_pass <dbl>,
## # ontarget_att_assist <dbl>, long_pass_own_to_opp <dbl>,
## # total_keeper_sweeper <dbl>, successful_final_third_passes <dbl>,
## # keeper_throws <dbl>, accurate_launches <dbl>, poss_lost_all <dbl>,
## # accurate_long_balls <dbl>, clean_sheet <dbl>, accurate_goal_kicks <dbl>,
## # saved_obox <dbl>, unsuccessful_touch <dbl>, poss_lost_ctrl <dbl>,
## # final_third_entries <dbl>, long_pass_own_to_opp_success <dbl>,
## # total_att_assist <dbl>, shot_faced <dbl>, duel_lost <dbl>,
## # blocked_scoring_att <dbl>, poss_won_att_3_rd <dbl>, dispossessed <dbl>,
## # accurate_cross <dbl>, att_rf_total <dbl>, won_tackle <dbl>,
## # total_chipped_pass <dbl>, lost_corners <dbl>, total_scoring_att <dbl>,
## # total_throws <dbl>, att_obx_centre <dbl>, att_openplay <dbl>,
## # poss_won_mid_3_rd <dbl>, freekick_cross <dbl>, touches_in_opp_box <dbl>,
## # accurate_chipped_pass <dbl>, duel_won <dbl>, total_cross_nocorner <dbl>,
## # total_tackle <dbl>, passes_left <dbl>, total_cross <dbl>,
## # att_obox_blocked <dbl>, head_pass <dbl>, crosses_18_yard <dbl>,
## # accurate_cross_nocorner <dbl>, effective_clearance <dbl>,
## # won_corners <dbl>, interception <dbl>, attempted_tackle_foul <dbl>,
## # backward_pass <dbl>, interception_won <dbl>, pen_area_entries <dbl>,
## # accurate_throws <dbl>, fouls <dbl>, total_clearance <dbl>,
## # crosses_18_yardplus <dbl>, total_shots <dbl>,
## # effective_blocked_cross <dbl>, outfielder_block <dbl>, blocked_cross <dbl>,
## # shield_ball_oop <dbl>, offside_provoked <dbl>, ontarget_scoring_att <dbl>,
## # passes_right <dbl>, att_ibox_target <dbl>, att_bx_centre <dbl>,
## # att_sv_low_centre <dbl>, …
After some further investigation, we find that De Vanna has only played in 3 games, rendering her passing accuracy score less than completely picturesque of her footballer skills. Although her accuracy for those 3 games in 2016 is certanly notable, it is not entirely fair to compare them again Staab’s 24, or even Solo’s 8. I might place a filter on number of entries so we can compare passing accuracy across players that have played many games and therfore have had their passing tested under many circumstances.
library(stringr)
# get the date into its own columns by year, month, day
adv_player_stats$date <-
str_sub(adv_player_stats$game_id,-10, -1)
adv_player_stats <- adv_player_stats %>%
separate(date, c("year", "month", "day"), "-")
# mallory pugh
mallory_pugh_passacc <- adv_player_stats %>%
filter(player_name == "Mallory Pugh") %>%
drop_na(passing_acc) %>%
summarize(passing_acc, player_name, year)
# sophia smith not in this NWSL database?
sophia_smith_passacc <- adv_player_stats %>%
filter(player_name == "Sophia Smith") %>%
drop_na(passing_acc) %>%
summarize(passing_acc, player_name, year)
# lynn williams
lynn_williams_passacc <- adv_player_stats %>%
filter(player_name == "Lynn Williams") %>%
drop_na(passing_acc) %>%
summarize(passing_acc, player_name, year)
# megan rapinoe
megan_rapinoe_passacc <- adv_player_stats %>%
filter(player_name == "Megan Rapinoe") %>%
drop_na(passing_acc) %>%
summarize(passing_acc, player_name, year)
# carli lloyd
carli_lloyd_passacc <- adv_player_stats %>%
filter(player_name == "Carli Lloyd") %>%
drop_na(passing_acc) %>%
summarize(passing_acc, player_name, year)
uswnt_fw_passacc <- adv_player_stats %>%
filter(player_name == c("Mallory Pugh", "Lynn Williams", "Carli Lloyd", "Megan Rapinoe")) %>%
drop_na(passing_acc) %>%
summarize(passing_acc, player_name, year)
uswnt_fw_passacc
## # A tibble: 60 x 3
## passing_acc player_name year
## <dbl> <chr> <chr>
## 1 0.743 Carli Lloyd 2016
## 2 0.5 Lynn Williams 2016
## 3 0.5 Lynn Williams 2016
## 4 0.727 Carli Lloyd 2016
## 5 0.75 Megan Rapinoe 2016
## 6 0.5 Lynn Williams 2016
## 7 0.933 Carli Lloyd 2016
## 8 0.639 Megan Rapinoe 2016
## 9 0.542 Lynn Williams 2016
## 10 0.429 Lynn Williams 2016
## # … with 50 more rows
write.csv(uswnt_fw_passacc, "uswnt_fw_passacc.csv")
megan_rapinoe_season_goals_per_min <- player_season_stats %>%
group_by(season) %>%
filter(player.x == c("Megan Rapinoe")) %>%
mutate(goals_per_min = gls/min) %>%
summarize(player.x, season, goals_per_min)
carli_lloyd_season_goals_per_min <- player_season_stats %>%
group_by(season) %>%
filter(player.x == c("Carli Lloyd")) %>%
mutate(goals_per_min = gls/min) %>%
summarize(player.x, season, goals_per_min)
lynn_williams_season_goals_per_min <- player_season_stats %>%
group_by(season) %>%
filter(player.x == c("Lynn Williams")) %>%
mutate(goals_per_min = gls/min) %>%
summarize(player.x, season, goals_per_min)
mal_pugh_season_goals_per_min <- player_season_stats %>%
group_by(season) %>%
filter(player.x == c("Mallory Pugh")) %>%
mutate(goals_per_min = gls/min) %>%
summarize(player.x, season, goals_per_min)
uswnt_fw_goals_per_min <- rbind(megan_rapinoe_season_goals_per_min, carli_lloyd_season_goals_per_min, lynn_williams_season_goals_per_min, mal_pugh_season_goals_per_min, by = "season")
write.csv(uswnt_fw_goals_per_min, "uswnt_fw_season_goals_per_min.csv")
uswnt_fw_final_third_passes <- adv_player_stats %>%
filter(player_name == c("Mallory Pugh", "Lynn Williams", "Carli Lloyd", "Megan Rapinoe")) %>%
group_by(player_name) %>%
summarise(mean(successful_final_third_passes))
uswnt_fw_final_third_passes
## # A tibble: 4 x 2
## player_name `mean(successful_final_third_passes)`
## * <chr> <dbl>
## 1 Carli Lloyd 10.2
## 2 Lynn Williams 6.07
## 3 Mallory Pugh 8.29
## 4 Megan Rapinoe 10.6
uswnt_fw_nswl_stats <- adv_player_stats %>%
filter(player_name == c("Mallory Pugh", "Carli Lloyd", "Lynn Williams", "Megan Rapinoe")) %>%
mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
group_by(player_name) %>%
summarise(player_name, year, mins_played, goals, big_chance_missed, goal_assist, total_offside, forward_zone_pass_acc, turnover, ontarget_att_assist, ontarget_scoring_att, successful_final_third_passes, leftside_pass, rightside_pass)
avg_uswnt_fw_nswl_stats <- adv_player_stats %>%
filter(player_name == c("Mallory Pugh", "Carli Lloyd", "Lynn Williams", "Megan Rapinoe")) %>%
mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
group_by(year) %>%
group_by(player_name) %>%
mutate(avg_mins_played = mean(mins_played)) %>%
#mutate(avg_forward_zone_pass_acc = mean(forward_zone_pass_acc)) %>%
mutate(avg_turnover = mean(turnover)) %>%
mutate(avg_goals = mean(goals)) %>%
mutate(avg_big_chance_missed = mean(big_chance_missed)) %>%
mutate(avg_goal_assist = mean(goal_assist)) %>%
mutate(avg_total_offside = mean(total_offside)) %>%
mutate(avg_ontarget_att_assist = mean(ontarget_att_assist)) %>%
mutate(avg_ontarget_scoring_att = mean(ontarget_scoring_att)) %>%
mutate(avg_successful_final_third_passes = mean(successful_final_third_passes)) %>%
mutate(avg_left_pass = mean(leftside_pass)) %>%
mutate(avg_right_pass = mean(rightside_pass)) %>%
summarise(player_name, year, avg_mins_played, avg_turnover, avg_goals,avg_big_chance_missed , avg_goal_assist, avg_total_offside, avg_ontarget_att_assist, avg_ontarget_scoring_att, avg_successful_final_third_passes, avg_left_pass, avg_right_pass)
avg_uswnt_fw_nswl_stats
## # A tibble: 54 x 13
## # Groups: player_name [4]
## player_name year avg_mins_played avg_turnover avg_goals avg_big_chance_…
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Carli Lloyd 2016 85 2.09 0.364 0.455
## 2 Carli Lloyd 2017 85 2.09 0.364 0.455
## 3 Carli Lloyd 2018 85 2.09 0.364 0.455
## 4 Carli Lloyd 2018 85 2.09 0.364 0.455
## 5 Carli Lloyd 2018 85 2.09 0.364 0.455
## 6 Carli Lloyd 2018 85 2.09 0.364 0.455
## 7 Carli Lloyd 2018 85 2.09 0.364 0.455
## 8 Carli Lloyd 2018 85 2.09 0.364 0.455
## 9 Carli Lloyd 2018 85 2.09 0.364 0.455
## 10 Carli Lloyd 2019 85 2.09 0.364 0.455
## # … with 44 more rows, and 7 more variables: avg_goal_assist <dbl>,
## # avg_total_offside <dbl>, avg_ontarget_att_assist <dbl>,
## # avg_ontarget_scoring_att <dbl>, avg_successful_final_third_passes <dbl>,
## # avg_left_pass <dbl>, avg_right_pass <dbl>
write.csv(uswnt_fw_nswl_stats, "uswnt_fw_nswl_stats.csv")
megan_rapinoe_annual_nswl_stats <- adv_player_stats %>%
filter(player_name == c("Megan Rapinoe")) %>%
mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
group_by(year) %>%
mutate(avg_mins_played = mean(mins_played)) %>%
#mutate(avg_forward_zone_pass_acc = mean(forward_zone_pass_acc)) %>%
mutate(avg_turnover = mean(turnover)) %>%
mutate(avg_goals = mean(goals)) %>%
mutate(avg_big_chance_missed = mean(big_chance_missed)) %>%
mutate(avg_goal_assist = mean(goal_assist)) %>%
mutate(avg_total_offside = mean(total_offside)) %>%
mutate(avg_ontarget_att_assist = mean(ontarget_att_assist)) %>%
mutate(avg_ontarget_scoring_att = mean(ontarget_scoring_att)) %>%
mutate(avg_successful_final_third_passes = mean(successful_final_third_passes)) %>%
mutate(avg_left_pass = mean(leftside_pass)) %>%
mutate(avg_right_pass = mean(rightside_pass)) %>%
summarise(year, avg_mins_played, avg_turnover, avg_goals,avg_big_chance_missed , avg_goal_assist, avg_total_offside, avg_ontarget_att_assist, avg_ontarget_scoring_att, avg_successful_final_third_passes, avg_left_pass, avg_right_pass)
megan_rapinoe_annual_nswl_stats <- distinct(megan_rapinoe_annual_nswl_stats)
megan_rapinoe_annual_nswl_stats
## # A tibble: 4 x 12
## # Groups: year [4]
## year avg_mins_played avg_turnover avg_goals avg_big_chance_… avg_goal_assist
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016 41 1 0.2 0 0.2
## 2 2017 88.8 2.47 0.706 0.0588 0.0588
## 3 2018 82.6 2.88 0.412 0.176 0.353
## 4 2019 75.3 3.17 0 0.333 0
## # … with 6 more variables: avg_total_offside <dbl>,
## # avg_ontarget_att_assist <dbl>, avg_ontarget_scoring_att <dbl>,
## # avg_successful_final_third_passes <dbl>, avg_left_pass <dbl>,
## # avg_right_pass <dbl>
write.csv(megan_rapinoe_annual_nswl_stats,"FW/megan_rapinoe_annual_nswl_stats.csv")
#
starters_fw_nswl_stats <- adv_player_stats %>%
filter(position == c("Striker")) %>%
mutate(forward_zone_pass_acc = accurate_fwd_zone_pass/total_fwd_zone_pass) %>%
group_by(year) %>%
mutate(avg_mins_played = mean(mins_played)) %>%
#mutate(avg_forward_zone_pass_acc = mean(forward_zone_pass_acc)) %>%
mutate(avg_turnover = mean(turnover)) %>%
mutate(avg_goals = mean(goals)) %>%
mutate(avg_big_chance_missed = mean(big_chance_missed)) %>%
mutate(avg_goal_assist = mean(goal_assist)) %>%
mutate(avg_total_offside = mean(total_offside)) %>%
mutate(avg_ontarget_att_assist = mean(ontarget_att_assist)) %>%
mutate(avg_ontarget_scoring_att = mean(ontarget_scoring_att)) %>%
mutate(avg_successful_final_third_passes = mean(successful_final_third_passes)) %>%
mutate(avg_left_pass = mean(leftside_pass)) %>%
mutate(avg_right_pass = mean(rightside_pass)) %>%
summarise(mean(avg_mins_played), mean(avg_turnover), mean(avg_goals), mean(avg_big_chance_missed) , mean(avg_goal_assist), mean(avg_total_offside), mean(avg_ontarget_att_assist), mean(avg_ontarget_scoring_att), mean(avg_successful_final_third_passes), mean(avg_left_pass), mean(avg_right_pass))
#distinct(fw_nswl_stats)
write.csv(distinct(starters_fw_nswl_stats), "starters_avg_fw_nwsl_stats.csv")
starters_fw_nswl_stats
## # A tibble: 4 x 12
## year `mean(avg_mins_… `mean(avg_turno… `mean(avg_goals… `mean(avg_big_c…
## * <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2016 81.7 1.84 0.272 0.127
## 2 2017 81.6 2.58 0.298 0.229
## 3 2018 81.3 2.81 0.271 0.409
## 4 2019 81.7 2.68 0.298 0.375
## # … with 7 more variables: `mean(avg_goal_assist)` <dbl>,
## # `mean(avg_total_offside)` <dbl>, `mean(avg_ontarget_att_assist)` <dbl>,
## # `mean(avg_ontarget_scoring_att)` <dbl>,
## # `mean(avg_successful_final_third_passes)` <dbl>,
## # `mean(avg_left_pass)` <dbl>, `mean(avg_right_pass)` <dbl>
year_by_year_passing_accuracy <- adv_player_stats %>%
summarize(player_name, passing_acc, year)
year_by_year_passing_accuracy
## # A tibble: 15,696 x 3
## player_name passing_acc year
## <chr> <dbl> <chr>
## 1 Michelle Betos 0.854 2016
## 2 Meghan Klingenberg 0.857 2016
## 3 Emily Menges 0.906 2016
## 4 Emily Sonnett 0.851 2016
## 5 Katherine Reynolds 0.75 2016
## 6 Amandine Henry 0.868 2016
## 7 Allie Long 0.910 2016
## 8 Tobin Heath 0.889 2016
## 9 Lindsey Horan 0.896 2016
## 10 Nadia Nadim 0.524 2016
## # … with 15,686 more rows
mal_pugh_pass_acc <- year_by_year_passing_accuracy %>%
group_by(year) %>%
filter(player_name == "Mallory Pugh") %>%
ggplot(aes(x = year, y = passing_acc)) +
geom_point(color = "red", shape = "star") +
geom_boxplot(color = "blue") +
ylab("passing accuracy") +
ggtitle("Mallory Pugh's NWSL Passing Accuracy")
ggplotly(mal_pugh_pass_acc)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
ggplotly(
year_by_year_passing_accuracy %>%
group_by(year) %>%
filter(player_name == "Carli Lloyd") %>%
ggplot(aes(x = year, y = passing_acc)) +
geom_point(color = "red", shape = "star") +
geom_boxplot(color = "blue") +
ylab("passing accuracy") +
ggtitle("Carli Lloyd's NWSL Passing Accuracy")
)
ggplotly(
year_by_year_passing_accuracy %>%
group_by(year) %>%
filter(player_name == "Rose Lavelle") %>%
ggplot(aes(x = year, y = passing_acc)) +
geom_point(color = "red", shape = "star") +
geom_boxplot(color = "blue") +
ylab("passing accuracy") +
ggtitle("Rose Lavelle's NWSL Passing Accuracy")
)
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
ggplotly(
year_by_year_passing_accuracy %>%
group_by(year) %>%
filter(player_name == "Alex Morgan") %>%
ggplot(aes(x = year, y = passing_acc)) +
geom_point(color = "red", shape = "star") +
geom_boxplot(color = "blue") +
ylab("passing accuracy") +
ggtitle("Alex Morgan's NWSL Passing Accuracy")
)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
year_by_year_passing_accuracy %>%
filter(player_name == "Christen Press") %>%
group_by(year) %>%
summarize(mean(passing_acc))
## # A tibble: 4 x 2
## year `mean(passing_acc)`
## * <chr> <dbl>
## 1 2016 0.690
## 2 2017 0.715
## 3 2018 0.658
## 4 2019 0.722
alyssa_naeher <- adv_player_stats %>%
filter(player_name == "Alyssa Naeher")
hope_solo <- adv_player_stats %>%
filter(player_name == "Hope Solo")
ashlyn_harris <- adv_player_stats %>%
filter(player_name == "Ashlyn Harris")
adrianna_franch <- adv_player_stats %>%
filter(player_name == "Adrianna Franch")